Package loading

source("../src/functions.R")

Data loading

count <- read_delim("../data/count.txt.gz", delim="\t")
## 
## ── Column specification ───────────────────────────────────────────────
## cols(
##   .default = col_double(),
##   gene_name = col_character()
## )
## ℹ Use `spec()` for the full column specifications.
expdesign <- read_delim("../data/expdesign.txt.gz", delim="\t", skip=6)
## 
## ── Column specification ───────────────────────────────────────────────
## cols(
##   index = col_double(),
##   sequencing_batch = col_double(),
##   amplification_batch = col_double(),
##   mouse_ID = col_logical(),
##   pool_barcode = col_character(),
##   sample_barcode = col_character(),
##   plate_id = col_double(),
##   well_id = col_character(),
##   number_of_cells = col_character(),
##   sorting_markers = col_character(),
##   RMT_length = col_double(),
##   group_name = col_character(),
##   ERCC_dilution = col_double(),
##   ERCC_volume_ul = col_double(),
##   Column_name_in_processed_data_file = col_character()
## )
## Warning: 3072 parsing failures.
##  row      col           expected actual                       file
## 1527 mouse_ID 1/0/T/F/TRUE/FALSE      4 '../data/expdesign.txt.gz'
## 1528 mouse_ID 1/0/T/F/TRUE/FALSE      4 '../data/expdesign.txt.gz'
## 1529 mouse_ID 1/0/T/F/TRUE/FALSE      4 '../data/expdesign.txt.gz'
## 1530 mouse_ID 1/0/T/F/TRUE/FALSE      4 '../data/expdesign.txt.gz'
## 1531 mouse_ID 1/0/T/F/TRUE/FALSE      4 '../data/expdesign.txt.gz'
## .... ........ .................. ...... ..........................
## See problems(...) for more details.

Wide -> Long

# Merge
# Filtering non single-cells / zero count cells
count %>%
    pivot_longer(-gene_name, names_to="cell", values_to="exp") %>%
        inner_join(expdesign,
            by=c("cell"="Column_name_in_processed_data_file")) %>%
            rename_all(tolower) %>%
                group_by(cell) %>%
                    mutate(sum=sum(exp)) %>%
                            filter(sum != 0 &&
                                number_of_cells == 1 &&
                                group_name %in%
                                    c("B cell", "CD8+pDC",
                                        "monocyte_or_neutrophil", "NK_cell")) %>%
                                    ungroup %>%
                                        arrange(cell) -> marsdata

Long -> Wide

# Extract gene expression part
marsdata %>%
    select(gene_name, cell, exp) %>%
        pivot_wider(names_from="cell", values_from="exp") -> wide_marsdata

# Gene name
wide_marsdata %>%
    select(gene_name) %>%
        data.frame %>%
            .[,1] -> genename

# Expression matrix -> SingleCellExperiment
wide_marsdata %>%
    select(!gene_name) %>%
        as.matrix %>%
            SingleCellExperiment(assays=list(counts=.[])) -> sce_marsdata

# rownames
genename -> rownames(sce_marsdata)

# coldata
marsdata %>%
    select(!c(gene_name, exp)) %>%
        distinct %>%
            arrange(cell) %>%
                DataFrame -> colData(sce_marsdata)

scater

# Analysis workflow of scater
sce_marsdata %>%
    logNormCounts %>%
        runPCA(ntop=2000, ncomponents=10) %>%
            runTSNE(dimred = "PCA") %>%
                runUMAP(dimred = "PCA") -> sce_marsdata

Visualization

Standard plot functions of R

pairs(reducedDim(sce_marsdata, "PCA"),
    col=factor(colData(sce_marsdata)$group_name),
    pch=16, cex=0.5, main="PCA")

plot(reducedDim(sce_marsdata, "TSNE"),
    col=factor(colData(sce_marsdata)$group_name),
    xlab="tSNE1", ylab="tSNE2",
    pch=16, cex=2, main="tSNE")

# Pair/scatter plot (scater)

sce_marsdata %>%
    plotReducedDim(dimred="PCA", colour_by="group_name", ncomponents=10)

sce_marsdata %>%
    plotReducedDim(dimred="TSNE", colour_by="group_name")

sce_marsdata %>%
    plotReducedDim(dimred="UMAP", colour_by="group_name")

Scatter plot (schex)

sce_marsdata %>%
    make_hexbin(nbins=40, dimension_reduction="TSNE") %>%
        plot_hexbin_feature(feature="Cd8a", type="logcounts",
            action="mean", xlab="tSNE1", ylab="tSNE2",
            title=paste0("Mean of Cd8a"))

ggplot2

reducedDim(sce_marsdata, "TSNE") %>%
    cbind(colData(sce_marsdata)$group_name) %>%
        data.frame %>%
            mutate_at(vars(-X3), as.numeric) %>%
            ggplot(aes(x=X1, y=X2, color=X3)) +
                geom_point() +
                    xlab("PC1") +
                        ylab("PC2") +
                            theme(legend.title = element_blank())

ggpairs

reducedDim(sce_marsdata, "PCA") %>%
    cbind(colData(sce_marsdata)$group_name) %>%
        data.frame %>%
            mutate_at(vars(-V11), as.numeric) %>%
            ggpairs(columns=1:10, aes(color=V11))

pairsD3

reducedDim(sce_marsdata, "PCA") %>%
    .[,1:5] %>%
        pairsD3(group=colData(sce_marsdata)$group_name,
            tooltip=colData(sce_marsdata)$cell)

Plotly

reducedDim(sce_marsdata) %>%
    as_tibble %>%
        select(PC1, PC2, PC3) %>%
            data.frame %>%
                plot_ly(x=~PC1, y=~PC2, z=~PC3,
                type = "scatter3d", mode = "markers",
                text = colData(sce_marsdata)$cell,
                color =~colData(sce_marsdata)$group_name
                )
## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.

iSEE

# app <- iSEE(sce_marsdata)
# runApp(app)

Session info

## R version 4.0.3 (2020-10-10)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: macOS Catalina 10.15.7
## 
## Matrix products: default
## BLAS/LAPACK: /Users/tsuyusakikouki/opt/anaconda3/envs/r-4.0/lib/libopenblasp-r0.3.10.dylib
## 
## locale:
## [1] C/UTF-8/C/C/C/C
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] schex_1.2.0                 Seurat_3.2.2               
##  [3] ggbeeswarm_0.6.0            styler_1.3.2               
##  [5] formatR_1.7                 lintr_2.0.1                
##  [7] knitr_1.30                  BiocStyle_2.16.0           
##  [9] rmarkdown_2.5               pairsD3_0.1.0              
## [11] shiny_1.5.0                 iSEE_2.0.0                 
## [13] plotly_4.9.2.1              GGally_2.0.0               
## [15] scater_1.16.0               SingleCellExperiment_1.10.1
## [17] SummarizedExperiment_1.18.1 DelayedArray_0.14.0        
## [19] matrixStats_0.57.0          Biobase_2.48.0             
## [21] GenomicRanges_1.40.0        GenomeInfoDb_1.24.0        
## [23] IRanges_2.22.1              S4Vectors_0.26.0           
## [25] BiocGenerics_0.34.0         forcats_0.5.0              
## [27] stringr_1.4.0               dplyr_1.0.2                
## [29] purrr_0.3.4                 readr_1.4.0                
## [31] tidyr_1.1.2                 tibble_3.0.4               
## [33] ggplot2_3.3.2               tidyverse_1.3.0            
## 
## loaded via a namespace (and not attached):
##   [1] shinydashboard_0.7.1      reticulate_1.18          
##   [3] tidyselect_1.1.0          htmlwidgets_1.5.2        
##   [5] grid_4.0.3                BiocParallel_1.22.0      
##   [7] Rtsne_0.15                munsell_0.5.0            
##   [9] codetools_0.2-17          ica_1.0-2                
##  [11] DT_0.16                   future_1.19.1            
##  [13] miniUI_0.1.1.1            withr_2.3.0              
##  [15] colorspace_1.4-1          rstudioapi_0.11          
##  [17] ROCR_1.0-11               tensor_1.5               
##  [19] shinyWidgets_0.5.4        listenv_0.8.0            
##  [21] labeling_0.4.2            GenomeInfoDbData_1.2.3   
##  [23] polyclip_1.10-0           farver_2.0.3             
##  [25] rprojroot_1.3-2           vctrs_0.3.4              
##  [27] generics_0.0.2            xfun_0.18                
##  [29] R6_2.4.1                  clue_0.3-57              
##  [31] rsvd_1.0.3                concaveman_1.1.0         
##  [33] rex_1.2.0                 spatstat.utils_1.17-0    
##  [35] bitops_1.0-6              reshape_0.8.8            
##  [37] shinyAce_0.4.1            assertthat_0.2.1         
##  [39] promises_1.1.1            scales_1.1.1             
##  [41] beeswarm_0.2.3            gtable_0.3.0             
##  [43] globals_0.13.1            goftest_1.2-2            
##  [45] processx_3.4.4            rlang_0.4.7              
##  [47] cyclocomp_1.1.0           GlobalOptions_0.1.2      
##  [49] splines_4.0.3             lazyeval_0.2.2           
##  [51] hexbin_1.28.1             broom_0.7.2              
##  [53] abind_1.4-5               reshape2_1.4.4           
##  [55] BiocManager_1.30.10       yaml_2.2.1               
##  [57] modelr_0.1.8              crosstalk_1.1.0.1        
##  [59] backports_1.1.10          httpuv_1.5.4             
##  [61] tools_4.0.3               ellipsis_0.3.1           
##  [63] RColorBrewer_1.1-2        ggridges_0.5.2           
##  [65] Rcpp_1.0.5                plyr_1.8.6               
##  [67] zlibbioc_1.34.0           RCurl_1.98-1.2           
##  [69] ps_1.4.0                  deldir_0.1-29            
##  [71] rpart_4.1-15              pbapply_1.4-3            
##  [73] GetoptLong_1.0.4          viridis_0.5.1            
##  [75] cowplot_1.1.0             zoo_1.8-8                
##  [77] haven_2.3.1               ggrepel_0.8.2            
##  [79] cluster_2.1.0             fs_1.5.0                 
##  [81] magrittr_1.5              RSpectra_0.16-0          
##  [83] data.table_1.13.2         circlize_0.4.10          
##  [85] colourpicker_1.1.0        lmtest_0.9-38            
##  [87] reprex_0.3.0              RANN_2.6.1               
##  [89] fitdistrplus_1.1-1        hms_0.5.3                
##  [91] patchwork_1.0.1           shinyjs_2.0.0            
##  [93] mime_0.9                  evaluate_0.14            
##  [95] xtable_1.8-4              readxl_1.3.1             
##  [97] gridExtra_2.3             shape_1.4.5              
##  [99] compiler_4.0.3            KernSmooth_2.23-17       
## [101] crayon_1.3.4              entropy_1.2.1            
## [103] htmltools_0.5.0           mgcv_1.8-33              
## [105] later_1.1.0.1             lubridate_1.7.9          
## [107] DBI_1.1.0                 tweenr_1.0.1             
## [109] dbplyr_1.4.4              ComplexHeatmap_2.4.2     
## [111] MASS_7.3-53               Matrix_1.2-18            
## [113] cli_2.1.0                 igraph_1.2.6             
## [115] pkgconfig_2.0.3           xml2_1.3.2               
## [117] vipor_0.4.5               XVector_0.28.0           
## [119] rvest_0.3.6               callr_3.5.1              
## [121] digest_0.6.26             sctransform_0.3.1        
## [123] RcppAnnoy_0.0.16          spatstat.data_1.4-3      
## [125] cellranger_1.1.0          leiden_0.3.4             
## [127] rintrojs_0.2.2            uwot_0.1.8               
## [129] DelayedMatrixStats_1.10.0 rjson_0.2.20             
## [131] lifecycle_0.2.0           nlme_3.1-149             
## [133] jsonlite_1.7.1            BiocNeighbors_1.6.0      
## [135] desc_1.2.0                viridisLite_0.3.0        
## [137] fansi_0.4.1               pillar_1.4.6             
## [139] lattice_0.20-41           fastmap_1.0.1            
## [141] httr_1.4.2                survival_3.2-7           
## [143] glue_1.4.2                remotes_2.2.0            
## [145] FNN_1.1.3                 spatstat_1.64-1          
## [147] png_0.1-7                 ggforce_0.3.2            
## [149] stringi_1.5.3             blob_1.2.1               
## [151] BiocSingular_1.4.0        irlba_2.3.3              
## [153] future.apply_1.6.0